
/*
thr: per income type 
cred_thr: per credit type
cumul_thr: overall sum of income type. NOTE THAT THIS IS VESTIGIAL.
*/

cap prog drop get_bad_dummies
prog define get_bad_dummies
syntax, [thr(real 500) cumul_thr(real 1000) cred_thr(real 100) ///
	dropvars(string)]

	// the set of non-dollar-related conditions
	local non_dollar fil_stat deps t_unkn early_roth itemizer ///
		educ depcare res_energ

	gen cumul_error = 0


	// filing status mismatch
	gen bad_fil_stat = cdw_filing_status ~= soi_fil_stat
	qui replace bad_fil_stat = 0 if soi_fil_stat == 1 & inrange(cdw_filing_status,1,4) == 0
	qui replace bad_fil_stat = 1 if cdw_tu_match == 0 & cdw_nonfiler_x == 0
	qui replace bad_fil_stat = 0 if cdw_nonfiler_x == 0 & inlist(soi_fil_stat,1,4)


	// Dependent problem // bad_deps already defined
	qui replace bad_deps = 0 if missing(bad_deps)

	// EITC dependent problem: bad_eitc_deps already defined

	// Pension taxable amount unknown
	rename cdw_t_unkn bad_t_unkn


	// Early Roth IRA distribution
	rename cdw_early_roth bad_early_roth

	// Discrepancy between 1099-R taxable and reported pensions/IRAs
	gen bad_txbl_pens = abs(cdw_pensions - soi_pens_anns_txbl - soi_ira_dist_txbl)>`thr'
	qui replace cumul_error = cumul_error + abs(cdw_pensions - soi_pens_anns_txbl - soi_ira_dist_txbl)


	// Non-capital gains/losses
	gen bad_noncap_gains = abs(soi_other_gains_losses) > `thr'
	qui replace cumul_error = cumul_error + abs(soi_other_gains_losses)


	// Capital gains not reported on Form 1099-B
	gen bad_cg = abs(soi_sched_d_line2h) + abs(soi_sched_d_line3h) + ///
		abs(soi_sched_d_line9h) + abs(soi_sched_d_line10h) > `thr'
	qui replace cumul_error = cumul_error + abs(soi_sched_d_line2h) + abs(soi_sched_d_line3h) + ///
		abs(soi_sched_d_line9h) + abs(soi_sched_d_line10h)

	// itemizer dummy 
	gen bad_itemizer = soi_fded == 1

	// Schedule C discrepancy
	gen bad_sched_c = abs(soi_sched_c_netincm - cdw_nec) > `thr'
	qui replace cumul_error = cumul_error +  abs(soi_sched_c_netincm - cdw_nec)

	// Farmer
	gen bad_sched_f = abs(soi_sched_f_netincm) > `thr'
	qui replace cumul_error = cumul_error + abs(soi_sched_f_netincm)


	// Rents and royalties
	gen bad_rents_royal = abs(soi_sched_e_net_rent_rylty - cdw_f1065_royal - cdw_f1120s_royal) > `thr' // abs(soi_sched_e_rents + soi_sched_e_royalties) > 100
	qui replace cumul_error = cumul_error + abs(soi_sched_e_net_rent_rylty - cdw_f1065_royal - cdw_f1120s_royal)

	// partnership income not subject to SECA [TO BE FILLED IN (or skipped)]

	// Pass-through income != K1 amounts
	gen bad_pt = abs(soi_sched_e_prtscp_incm - soi_sched_e_prtscp_loss ///
		- cdw_f1120s_ord - cdw_f1065_ord) > `thr'
	qui replace cumul_error = cumul_error + abs(soi_sched_e_prtscp_incm - soi_sched_e_prtscp_loss ///
		- cdw_f1120s_ord - cdw_f1065_ord)

	// QBI deduction [doing this only at the high end, to not have to deal with taxable income limitation]

	gen pred_qbi_ded = 0.2*cdw_div_199a + 0.2*max(0, soi_sched_f_netincm + soi_sched_e_prtscp_incm - soi_sched_e_prtscp_loss + ///
			soi_sched_c_netincm - soi_seca_tax/2 - soi_se_health_ins_ded-soi_se_qual_ret_plans)

	gen bad_qbi_ded = abs(pred_qbi_ded - soi_qbi_ded) > `thr' & soi_txbl_incm >= 100000
	qui replace cumul_error = cumul_error + abs(pred_qbi_ded - soi_qbi_ded)
	drop pred_qbi_ded

		

		
	// NIIT discrepancies [TO BE FILLED IN, or skipped]

	// bad education
	gen bad_educ = soi_qual_educ_expenses > `cred_thr'

	// child care credit
	qui gen bad_depcare = soi_chld_dep_care_credit > `cred_thr'

	// residential energy credit
	gen bad_res_energ = soi_res_energy_credit > `cred_thr'

	// certain above-the-line deductions
	gen bad_aboveline = soi_educator_expenses + soi_moving_expenses + soi_se_health_ins_ded > `thr'
	qui replace cumul_error = cumul_error + soi_educator_expenses + soi_moving_expenses + soi_se_health_ins_ded

	// alimony
	gen bad_alimony = soi_alimony_paid > `thr' | soi_alimony_received > `thr'
	qui replace cumul_error = cumul_error + soi_alimony_paid + soi_alimony_received

	
	/* Other line-by-line mess-ups */
	
	gen bad_wages = abs(soi_wages - cdw_wages_prim - cdw_wages_sec) > `thr'
	qui replace cumul_error = cumul_error + abs(soi_wages - cdw_wages_prim - cdw_wages_sec)

	gen bad_int = abs(soi_interest_txbl - cdw_intrec ) > `thr'
	qui replace cumul_error = cumul_error + abs(soi_qual_divs - cdw_qdiv)

	gen bad_qual_divs = abs(soi_qual_divs - cdw_qdiv) > `thr'
	qui replace cumul_error = cumul_error + abs(soi_qual_divs - cdw_qdiv)
	

	// any bad
	
	cap drop `dropvars'
	
	egen bad_any = rowmax(bad_*)
	
end


